Chapter 11
Lines for time series, w/ time on the x-axis
# data from: https://www.freddiemac.com/pmms/docs/historicalweeklydata.xls
library(tidyverse)
df <- read_csv("historicalmortgage.csv")
df <- df |>
pivot_longer(cols = -DATE, names_to = "TYPE", values_to = "RATE") |>
mutate(TYPE = forcats::fct_reorder2(TYPE, DATE, RATE)) # puts legend in correct order
ggplot(df, aes(DATE, RATE, color = TYPE)) +
geom_line() +
ggtitle("U.S. Mortgage Rates") +
labs (x = "", y = "percent") +
theme_grey(16) +
theme(legend.title = element_blank())ggplot(df, aes(DATE, RATE, fill = TYPE)) +
geom_col() +
ggtitle("U.S. Mortgage Rates", subtitle = "Not recommended!")library(lubridate)
df2020 <- df |>
filter(year(DATE) == 2020)
ggplot(df2020, aes(DATE, RATE, color = TYPE)) +
geom_line() +
ggtitle("U.S. Mortgage Rates")dfman <- read_csv("ManchesterByTheSea.csv")
g <- ggplot(dfman, aes(Date, Gross)) +
geom_line() +
ggtitle("Manchester by the Sea", "Daily Gross (US$), United States") +
xlab("2016-2017")
gdfman$Group <- round(1:nrow(dfman)/(nrow(dfman)/5))
g <- ggplot(dfman, aes(Date, Gross)) +
geom_point()
g +
geom_line(color = "grey50") +
geom_smooth(method = "loess", se = FALSE, lwd =1.5) +
ggtitle("Loess smoother")g +
geom_line(color = "grey50") +
geom_smooth(aes(group = Group), method = 'lm', color = "red", se = FALSE) +
geom_smooth(color = "blue", se = FALSE)g <- ggplot(dfman, aes(Date, Gross)) + geom_line() +
ggtitle("Manchester by the Sea",
"Daily Gross, United States")
saturday <- dfman |> filter(wday(Date) == 7)
g + geom_point(data = saturday, aes(Date, Gross),
color = "deeppink")ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE))ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE)) +
geom_smooth(se = FALSE)christmas <- dfman |>
filter(Date >= as.Date("2016-12-20") &
Date <= ("2017-01-03"))
ggplot(christmas, aes(Date, Gross)) +
geom_label(aes(label = wday(Date, label = TRUE))) +
geom_line(color = "cornflowerblue") +
scale_x_date(date_labels = "%b\n%d",
date_breaks = "1 day")ggplot(christmas, aes(Date, Gross/1000000)) +
geom_line(color = "cornflowerblue", lwd = 1.1) +
geom_point(color = "cornflowerblue", size = 2) +
geom_label(data = christmas,
aes(x = Date, y = Gross/1000000 + .06,
label = day(Date))) +
scale_x_date(date_labels = "%a",
date_breaks = "1 day") +
ggtitle("Manchester by the Sea",
"Chistmas Week Box Office Gross") +
labs(x = "Dec 2016 - Jan 2017",
y = "Daily Gross (in millions $US)") +
theme_grey(14)# annotate Christmas Week
start <- as.Date("2016-12-24")
end <- as.Date("2017-01-02")
g + annotate("rect", xmin = start, xmax = end,
ymin = -Inf, ymax = Inf, fill = "green",
alpha = .2) +
annotate("text", x = end + 2,
y = 1500000, label = "Dec 24 - Jan 2",
color = "green", hjust = 0) +
theme_classic()set.seed(5702)
tidydf <- data.frame(time = 1:6, y1 = round(rnorm(6, 100, 50), 2),
y2 = round(rnorm(6, 10, 5), 2)) |>
pivot_longer(cols = -time)
ggplot(tidydf, aes(time, value, col = name)) +
geom_line()Each value is divided by the first value for that group and multiplied by 100:
| time | name | value |
|---|---|---|
| 1 | y1 | 90.26 |
| 1 | y2 | 8.12 |
| 2 | y1 | 107.48 |
| 2 | y2 | 8.55 |
| 3 | y1 | 110.28 |
| 3 | y2 | 12.83 |
| 4 | y1 | 81.77 |
| 4 | y2 | 2.01 |
| 5 | y1 | 130.62 |
| 5 | y2 | 11.68 |
| 6 | y1 | 41.43 |
| 6 | y2 | 16.49 |
tidydf <- tidydf |> group_by(name) |>
mutate(index = round(100*value/value[1], 2)) |>
ungroup()
tidydf| time | name | value | index |
|---|---|---|---|
| 1 | y1 | 90.26 | 100.00 |
| 1 | y2 | 8.12 | 100.00 |
| 2 | y1 | 107.48 | 119.08 |
| 2 | y2 | 8.55 | 105.30 |
| 3 | y1 | 110.28 | 122.18 |
| 3 | y2 | 12.83 | 158.00 |
| 4 | y1 | 81.77 | 90.59 |
| 4 | y2 | 2.01 | 24.75 |
| 5 | y1 | 130.62 | 144.72 |
| 5 | y2 | 11.68 | 143.84 |
| 6 | y1 | 41.43 | 45.90 |
| 6 | y2 | 16.49 | 203.08 |
set.seed(5702)
day <- 1:31
number <- 10 * (day - 14)^2 + 2000 + rnorm(1:31, 0, 400)
df <- data.frame(day, number)
ggplot(df, aes(day, number)) +
geom_line(color = "deeppink") +
geom_point(color = "deeppink") +
scale_x_continuous(breaks = 1:31) +
scale_y_continuous(limits = c(0, 5000)) +
ggtitle("Average Motor Vehicle Inspections per Day",
subtitle = "(fake data)") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))ggplot(df, aes(day, number)) +
geom_col() +
scale_x_continuous(breaks = 1:31) +
ggtitle("Average Motor Vehicle Inspections per Day") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))# read file
mydat <- read_csv("WA_Sales_Products_2012-14.csv") |>
mutate(Revenue = Revenue/1000000)
# convert Quarter to a single numeric value Q
mydat$Q <- as.numeric(substr(mydat$Quarter, 2, 2))
# convert Q to end-of-quarter date
mydat$Date <- as.Date(paste0(mydat$Year, "-",
as.character(mydat$Q*3),
"-30"))
# Check that dates look ok
# unique(mydat$Date)Methoddata <- mydat |> group_by(Date, `Order method type`) |>
summarize(Revenue = sum(Revenue))
g <- ggplot(Methoddata, aes(Date, Revenue,
color = `Order method type`)) +
geom_line(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
gNo2013 <- Methoddata |> filter(year(Date) != 2013)
g <- ggplot(No2013, aes(Date, Revenue,
color = `Order method type`)) +
geom_line(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
g(set missing values to NA)
Methoddata$Date[year(Methoddata$Date)==2013] <- NA
g <- ggplot(Methoddata, aes(Date, Revenue,
color = `Order method type`)) +
geom_path(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
g